# 01_political_knowledge_lca.R
# Created: 2020-1-10 Taka-aki Asano
# Last Modified: 2023-2-19

# package
require("readr")
require("dplyr")
require("tidyr")
require("stringr")
require("ggplot2")
require("ggpubr")
require("poLCA")
quartzFonts(HiraKaku = quartzFont(rep('Hiragino Kaku Gothic Pro W3', 4)))
theme_set(theme_classic(base_size = 12, base_family = 'HiraKaku'))


# function to calculate cAIC
cAIC <- function(res) {
  return((-2 * res$llik) + ((log(res$N) + 1) * res$npar))
}


# read dataset
voter2019 <- read_csv("replication_dataset.csv", locale = locale(encoding="UTF8"))


# answer
voter2019 <- mutate(
  voter2019, 
  KNOWLEDGE_1A = recode_factor(KNOWLEDGE_1, `1` = "Incorrect", `2` = "Correct", `3` = "DK"), 
  KNOWLEDGE_2A = recode_factor(KNOWLEDGE_2, `1` = "Incorrect", `2` = "Correct", `3` = "DK"), 
  KNOWLEDGE_3A = recode_factor(KNOWLEDGE_3, `2` = "Incorrect", `1` = "Correct", `3` = "DK"), 
  KNOWLEDGE_4A = recode_factor(KNOWLEDGE_4, `1` = "Incorrect", `2` = "Correct", `3` = "DK"), 
  KNOWLEDGE_5A = recode_factor(KNOWLEDGE_5, `1` = "Incorrect", `2` = "Correct", `3` = "DK"), 
  KNOWLEDGE_6A = recode_factor(KNOWLEDGE_6, `1` = "Incorrect", `2` = "Correct", `3` = "DK"), 
  KNOWLEDGE_7A = recode_factor(KNOWLEDGE_7, `2` = "Incorrect", `1` = "Correct", `3` = "DK"), 
  KNOWLEDGE_8A = recode_factor(KNOWLEDGE_8, `1` = "Incorrect", `2` = "Correct", `3` = "DK"), 
  KNOWLEDGE_9A = recode_factor(KNOWLEDGE_9, `1` = "Incorrect", `2` = "Correct", `3` = "DK"), 
  KNOWLEDGE_10A = recode_factor(KNOWLEDGE_10, `1` = "Incorrect", `2` = "Correct", `3` = "DK")
)
prop.table(table(voter2019$KNOWLEDGE_1A))
prop.table(table(voter2019$KNOWLEDGE_2A))
prop.table(table(voter2019$KNOWLEDGE_3A))
prop.table(table(voter2019$KNOWLEDGE_4A))
prop.table(table(voter2019$KNOWLEDGE_5A))
prop.table(table(voter2019$KNOWLEDGE_6A))
prop.table(table(voter2019$KNOWLEDGE_7A))
prop.table(table(voter2019$KNOWLEDGE_8A))
prop.table(table(voter2019$KNOWLEDGE_9A))
prop.table(table(voter2019$KNOWLEDGE_10A))


# run LCA
f <- cbind(KNOWLEDGE_1A, KNOWLEDGE_2A, KNOWLEDGE_3A, KNOWLEDGE_4A, KNOWLEDGE_5A, 
           KNOWLEDGE_6A, KNOWLEDGE_7A, KNOWLEDGE_8A, KNOWLEDGE_9A, KNOWLEDGE_10A) ~ 1
select <- data.frame(Class = rep(2:7 , times = 2), 
                     Index = rep(c("cAIC", "BIC"), each = 6), 
                     Value = rep(NA, times = 12))
for (i in 2:7) {
  res <- poLCA(f, data = voter2019, nclass = i, 
               maxiter = 6000, nrep = 10, verbose = FALSE)
  select$Value[select$Index == "cAIC" & select$Class == i] <- cAIC(res)
  select$Value[select$Index == "BIC" & select$Class == i] <- res$bic
}

## plot cAIC, BIC
index_plot <- ggplot(select, aes(x = Class, y = Value, group = Index)) + 
  geom_line() + geom_point() + facet_wrap(~ Index) + 
  labs(x = "Number of Classes", y = "Value") + 
  theme(plot.title = element_blank())
plot(index_plot)

## fix nclass
set.seed(1)
res <- poLCA(f, voter2019, nclass = 4, maxiter = 6000, nrep = 10, verbose = FALSE)
res$P ## ratio


# plot probability
prob <- data.frame(
  Variables = rep(c("Deputy Prime Minister", "Revision of The Constitution", 
                    "Electoral System", "New Bank Notes", 
                    "Sales Tax", "UN Secretary‐General", 
                    "ICJ", "Whale Conservation", 
                    "Hong Kong Protests", "Japan–Korea Disputes"), 
                  each = 12), 
  Class = rep(rep(paste("Class", 1:4), each = 3), times = 10), 
  Response = rep(c("Incorrect", "Correct", "DK"), times = 40), 
  Probability = c(res$probs$KNOWLEDGE_1A[1,], res$probs$KNOWLEDGE_1A[2,], 
                  res$probs$KNOWLEDGE_1A[3,], res$probs$KNOWLEDGE_1A[4,], 
                  res$probs$KNOWLEDGE_2A[1,], res$probs$KNOWLEDGE_2A[2,], 
                  res$probs$KNOWLEDGE_2A[3,], res$probs$KNOWLEDGE_2A[4,], 
                  res$probs$KNOWLEDGE_3A[1,], res$probs$KNOWLEDGE_3A[2,], 
                  res$probs$KNOWLEDGE_3A[3,], res$probs$KNOWLEDGE_3A[4,], 
                  res$probs$KNOWLEDGE_4A[1,], res$probs$KNOWLEDGE_4A[2,], 
                  res$probs$KNOWLEDGE_4A[3,], res$probs$KNOWLEDGE_4A[4,], 
                  res$probs$KNOWLEDGE_5A[1,], res$probs$KNOWLEDGE_5A[2,], 
                  res$probs$KNOWLEDGE_5A[3,], res$probs$KNOWLEDGE_5A[4,], 
                  res$probs$KNOWLEDGE_6A[1,], res$probs$KNOWLEDGE_6A[2,], 
                  res$probs$KNOWLEDGE_6A[3,], res$probs$KNOWLEDGE_6A[4,], 
                  res$probs$KNOWLEDGE_7A[1,], res$probs$KNOWLEDGE_7A[2,], 
                  res$probs$KNOWLEDGE_7A[3,], res$probs$KNOWLEDGE_7A[4,], 
                  res$probs$KNOWLEDGE_8A[1,], res$probs$KNOWLEDGE_8A[2,], 
                  res$probs$KNOWLEDGE_8A[3,], res$probs$KNOWLEDGE_8A[4,], 
                  res$probs$KNOWLEDGE_9A[1,], res$probs$KNOWLEDGE_9A[2,], 
                  res$probs$KNOWLEDGE_9A[3,], res$probs$KNOWLEDGE_9A[4,], 
                  res$probs$KNOWLEDGE_10A[1,], res$probs$KNOWLEDGE_10A[2,], 
                  res$probs$KNOWLEDGE_10A[3,], res$probs$KNOWLEDGE_10A[4,]), 
  SE = c(res$probs.se$KNOWLEDGE_1A[1,], res$probs.se$KNOWLEDGE_1A[2,], 
         res$probs.se$KNOWLEDGE_1A[3,], res$probs.se$KNOWLEDGE_1A[4,], 
         res$probs.se$KNOWLEDGE_2A[1,], res$probs.se$KNOWLEDGE_2A[2,], 
         res$probs.se$KNOWLEDGE_2A[3,], res$probs.se$KNOWLEDGE_2A[4,], 
         res$probs.se$KNOWLEDGE_3A[1,], res$probs.se$KNOWLEDGE_3A[2,], 
         res$probs.se$KNOWLEDGE_3A[3,], res$probs.se$KNOWLEDGE_3A[4,], 
         res$probs.se$KNOWLEDGE_4A[1,], res$probs.se$KNOWLEDGE_4A[2,], 
         res$probs.se$KNOWLEDGE_4A[3,], res$probs.se$KNOWLEDGE_4A[4,], 
         res$probs.se$KNOWLEDGE_5A[1,], res$probs.se$KNOWLEDGE_5A[2,], 
         res$probs.se$KNOWLEDGE_5A[3,], res$probs.se$KNOWLEDGE_5A[4,], 
         res$probs.se$KNOWLEDGE_6A[1,], res$probs.se$KNOWLEDGE_6A[2,], 
         res$probs.se$KNOWLEDGE_6A[3,], res$probs.se$KNOWLEDGE_6A[4,], 
         res$probs.se$KNOWLEDGE_7A[1,], res$probs.se$KNOWLEDGE_7A[2,], 
         res$probs.se$KNOWLEDGE_7A[3,], res$probs.se$KNOWLEDGE_7A[4,], 
         res$probs.se$KNOWLEDGE_8A[1,], res$probs.se$KNOWLEDGE_8A[2,], 
         res$probs.se$KNOWLEDGE_8A[3,], res$probs.se$KNOWLEDGE_8A[4,], 
         res$probs.se$KNOWLEDGE_9A[1,], res$probs.se$KNOWLEDGE_9A[2,], 
         res$probs.se$KNOWLEDGE_9A[3,], res$probs.se$KNOWLEDGE_9A[4,], 
         res$probs.se$KNOWLEDGE_10A[1,], res$probs.se$KNOWLEDGE_10A[2,], 
         res$probs.se$KNOWLEDGE_10A[3,], res$probs.se$KNOWLEDGE_10A[4,])
)
prob$Variables <- factor(
  prob$Variables, 
  levels = rev(c("Deputy Prime Minister", "Revision of The Constitution", 
                 "Electoral System", "New Bank Notes", 
                 "Sales Tax", "UN Secretary‐General", 
                 "ICJ", "Whale Conservation", 
                 "Hong Kong Protests", "Japan–Korea Disputes")))

## correct answer
prob_correct_plot <- ggplot(prob[prob$Response == "Correct",], 
                            aes(x = Variables, y = Probability, group = Class, 
                                ymin = Probability - 1.96 * SE, 
                                ymax = Probability + 1.96 * SE)) + 
  geom_hline(yintercept = 0.5, linetype = "dashed", 
             color = "grey74", size = 1.1) + 
  geom_pointrange(size = 0.3) + ylim(-0.3, 1.3) + 
  geom_text(aes(label = round(Probability, 2)), size = 3.5, vjust = -0.8) + 
  facet_wrap(Class ~ ., ncol = 4) + coord_flip() + 
  labs(title = "(a) Probability of Correct Answer", x = "Question", y = "Probability") + 
  theme(plot.title = element_text(hjust = 0.5), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12), 
        strip.text = element_text(size = 12))
plot(prob_correct_plot)

## incorrect answer
prob_incorrect_plot <- ggplot(prob[prob$Response == "Incorrect",], 
                              aes(x = Variables, y = Probability, group = Class, 
                                  ymin = Probability - 1.96 * SE, 
                                  ymax = Probability + 1.96 * SE)) + 
  geom_hline(yintercept = 0.5, linetype = "dashed", 
             color = "grey74", size = 1.1) + 
  geom_pointrange(size = 0.3) + ylim(-0.3, 1.3) + 
  geom_text(aes(label = round(Probability, 2)), size = 3.5, vjust = -0.8) + 
  facet_wrap(Class ~ ., ncol = 4) + coord_flip() + 
  labs(title = "(b) Probability of Incorrect Answer", x = "Question", y = "Probability") + 
  theme(plot.title = element_text(hjust = 0.5), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12), 
        strip.text = element_text(size = 12))
plot(prob_incorrect_plot)

## DK
prob_dk_plot <- ggplot(prob[prob$Response == "DK",], 
                       aes(x = Variables, y = Probability, group = Class, 
                           ymin = Probability - 1.96 * SE, 
                           ymax = Probability + 1.96 * SE)) + 
  geom_hline(yintercept = 0.5, linetype = "dashed", 
             color = "grey74", size = 1.1) + 
  geom_pointrange(size = 0.3) + ylim(-0.3, 1.3) + 
  geom_text(aes(label = round(Probability, 2)), size = 3.5, vjust = -0.8) + 
  facet_wrap(Class ~ ., ncol = 4) + coord_flip() + 
  labs(title = "(c) Probability of DK", x = "Question", y = "Probability") + 
  theme(plot.title = element_text(hjust = 0.5), 
        axis.text.x = element_text(size = 12), 
        axis.text.y = element_text(size = 12), 
        strip.text = element_text(size = 12))
plot(prob_dk_plot)


# merge with original data
voter2019$Knowledge_Class <- paste("Class", res$predclass, sep = "") %>% 
  factor(., levels = c("Class1", "Class2", "Class3", "Class4"))
prop.table(table(voter2019$Knowledge_Class))
voter2019 <- res$posterior %>% 
  as.data.frame() %>% 
  rename(Class1 = V1, Class2 = V2, Class3 = V3, Class4 = V4) %>% 
  cbind(voter2019, .)


# plot density
group1_density <- ggplot(voter2019, aes(x = Class1, y = ..density..)) + 
  geom_histogram(bins = 10, color = "black", fill = "white") + 
  labs(x = "Probability", y = "Density", title = "Class 1") + 
  scale_y_continuous(expand = c(0, 0), limits = c(0, 7)) + 
  theme(plot.title = element_text(hjust = 0.5))
group2_density <- ggplot(voter2019, aes(x = Class2, y = ..density..)) + 
  geom_histogram(bins = 10, color = "black", fill = "white") + 
  labs(x = "Probability", y = "Density", title = "Class 2") + 
  scale_y_continuous(expand = c(0, 0), limits = c(0, 7)) + 
  theme(plot.title = element_text(hjust = 0.5))
group3_density <- ggplot(voter2019, aes(x = Class3, y = ..density..)) + 
  geom_histogram(bins = 10, color = "black", fill = "white") + 
  labs(x = "Probability", y = "Density", title = "Class 3") + 
  scale_y_continuous(expand = c(0, 0), limits = c(0, 7)) + 
  theme(plot.title = element_text(hjust = 0.5))
group4_density <- ggplot(voter2019, aes(x = Class4, y = ..density..)) + 
  geom_histogram(bins = 10, color = "black", fill = "white") + 
  labs(x = "Probability", y = "Density", title = "Class 4") + 
  scale_y_continuous(expand = c(0, 0), limits = c(0, 7)) + 
  theme(plot.title = element_text(hjust = 0.5))
ggarrange(group1_density, group2_density, 
          group3_density, group4_density, nrow = 1)
